#rm(list=ls(all=TRUE))

ds <- read.delim("prophecyAJPS.txt",header=TRUE)	# Read into memory a tab delimited file and assign it to a data frame ds
attach(ds)    						# make variables available by name 

assign <- NumAzar
info.describe <- ifelse(assign==1|assign==2,P75,99)

datm<-cbind(P76,P106,assign,P84,P85,P86,info.describe)
datm<-na.omit(datm)

n.sample <-nrow(datm)
n.boot <-600
index  <-1:n.sample

## prob. of 1 for randomizing device or group status indicator ##

p <- 0.2635659

## Set up function to calculate parameter values according to E-M algorithm ##

EM <- function(data,par) { 

n<-nrow(data)
Y<-data[,1]
X<-as.matrix(cbind(1,data[,2:ncol(data)]))

lamT1 <-par[1]
lamL1 <-par[2]
lamT0 <-par[3]
Beta  <-par[4:length(par)]

mu    <- (1+exp(-X%*%Beta))^-1

n.A <- sum(Y==3|Y==4)
Pr.1.3  <- as.numeric(Y>=3)*0 + as.numeric(Y==1)*((p*lamT0*(1-mu))/(p*lamT0*(1-mu) + (1-p)*lamL1*mu)) + as.numeric(Y==2)*(((1-p)*lamT0*(1-mu))/((1-p)*lamT0*(1-mu) + p*lamL1*mu))
Pr.2.4  <- as.numeric(Y>=3)*0 + as.numeric(Y<3)*(1-Pr.1.3)
Pr.5.6  <- ifelse(Y==3|Y==4,1,0)
Pr.7.9  <- as.numeric(Y<=4)*0 + as.numeric(Y==5)*((p*(1-lamT0)*(1-mu))/(p*(1-lamT0)*(1-mu) + (1-p)*(1-lamT1-lamL1)*mu)) + as.numeric(Y==6)*(((1-p)*(1-lamT0)*(1-mu))/((1-p)*(1-lamT0)*(1-mu) + p*(1-lamT1-lamL1)*mu))
Pr.8.10 <- as.numeric(Y<=4)*0 + as.numeric(Y>4)*(1-Pr.7.9)

Pr.T <- Pr.2.4 + Pr.5.6 + Pr.8.10

lamT1.j <- n.A/(n.A + sum(Pr.2.4) + sum(Pr.8.10) )
lamL1.j <- sum(Pr.2.4)/(n.A + sum(Pr.2.4) + sum(Pr.8.10) )
lamT0.j <- sum(Pr.1.3)/(sum(Pr.1.3)+sum(Pr.7.9) )
  
V <- mu*(1-mu)
X.til<-matrix(NA,nrow=nrow(X),ncol=ncol(X))
for (i in 1:n) {X.til[i,]<-V[i]*X[i,]} 

D <- Pr.T - mu

Beta.j <- Beta + solve(t(X)%*%X.til,tol=.Machine$double.eps^2)%*%t(X)%*%D

est.j <- c(lamT1.j,lamL1.j,lamT0.j,Beta.j)
est.j}

tol <- .Machine$double.eps # tolerance for stopping algorithm
M<-3000
init<-c(.2,.2,.2,0,0,0,0,0,0,0,0,0) # initial parameter estimates


LATE.T1<-rep(NA,n.boot)
LATE.T2<-rep(NA,n.boot)

for (k in 1:n.boot){

choose <-sample(index,size=n.sample,replace=T)
dat    <-datm[choose,]

info.describe<-dat[,7]
assign<-dat[,3]

Intern.C<-info.describe[assign==1]
Intern.E<-info.describe[assign==2]

received.C <-as.numeric(Intern.C==1|Intern.C==2)
received.E <-as.numeric(Intern.E==3|Intern.E==4)

prop.rec.C <-mean(received.C)
prop.rec.E <-mean(received.E)

Y <- 1*as.numeric(dat[,1]==2&dat[,2]==2) + 2*as.numeric(dat[,1]==1&dat[,2]==2) + 3*as.numeric(dat[,1]==2&dat[,2]==1) + 4*as.numeric(dat[,1]==1&dat[,2]==1) + 5*as.numeric(dat[,1]==2&dat[,2]==3|dat[,1]==2&dat[,2]==99) + 6*as.numeric(dat[,1]==1&dat[,2]==3|dat[,1]==1&dat[,2]==99)
X <- dat[Y!=0,3:6]
corrupt.treat <- as.numeric(X[,1]==1) 
efficiency.treat <- as.numeric(X[,1]==2) 
male <- as.numeric(X[,2]==1) 
log.age <- log(X[,3])
primary.edu   <- as.numeric(X[,4]==1|X[,4]==2|X[,4]==3)
high.sch.drop <- as.numeric(X[,4]==4)
high.sch.grad <- as.numeric(X[,4]==5)
technical.edu <- as.numeric(X[,4]==6|X[,4]==7)


Y <- Y[Y!=0]
dta<-cbind(Y,corrupt.treat,efficiency.treat,male,log.age,primary.edu,high.sch.drop,high.sch.grad,technical.edu)

pars.old <- EM(dta,init)

for (j in 1:M){

pars.new <- EM(dta,pars.old)
abs.diff <- abs(pars.new-pars.old)
if (max(abs.diff) < tol ) break
pars.old <- pars.new}


EM.est<-pars.new
Beta.mle  <-pars.new[4:length(EM.est)]
X.boot<-dta[,4:ncol(dta)]

Xb.corr  <-   as.matrix(cbind(1,1,0,X.boot))
Xb.ineff <-   as.matrix(cbind(1,0,1,X.boot))
Xb.cont  <-   as.matrix(cbind(1,0,0,X.boot))

yboot.corr <- (1+exp(-Xb.corr%*%Beta.mle))^-1
yboot.ineff <- (1+exp(-Xb.ineff%*%Beta.mle))^-1
yboot.cont <- (1+exp(-Xb.cont%*%Beta.mle))^-1

muboot.corr  <- mean(yboot.corr)
muboot.ineff <- mean(yboot.ineff)
muboot.cont  <- mean(yboot.cont)

LATE.T1[k] <- (muboot.corr-muboot.cont)/prop.rec.C
LATE.T2[k] <- (muboot.ineff-muboot.cont)/prop.rec.E

}

mu.LATE.T1 <- mean(LATE.T1)
mu.LATE.T2 <- mean(LATE.T2)

SE.LATE.T1 <- sd(LATE.T1)
SE.LATE.T2 <- sd(LATE.T2)

conf.T1 <- quantile(LATE.T1,c(0.025,0.975))
conf.T2 <- quantile(LATE.T2,c(0.025,0.975))

LATE.corruption <- c(mu.LATE.T1,SE.LATE.T1,conf.T1)
names(LATE.corruption) <- c("estimate","SE","2.5%","97.5%")

LATE.inefficiency <- c(mu.LATE.T2,SE.LATE.T2,conf.T2)
names(LATE.inefficiency) <- c("estimate","SE","2.5%","97.5%")

print(list(LATE.estimate.corruption=LATE.corruption,LATE.estimate.inefficiency=LATE.inefficiency))
